from google.colab import drive
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
drive.mount('/content/drive')
Mounted at /content/drive
df = pd.read_csv("/content/drive/My Drive/data/hotel_bookings.csv")
df_notcanceled = df[(df['is_canceled'] == 0)]
df_canceled = df[(df['is_canceled'] == 1)]
columns = ['lead_time', 'stays_in_weekend_nights', 'stays_in_week_nights', 'adults', 'children', 'babies', 'required_car_parking_spaces', 'adr', 'previous_cancellations', 'previous_bookings_not_canceled', 'booking_changes']
n = 1
plt.figure(figsize=(40,30))
for column in list(df):
plt.subplot(6,5,n)
n = n+1
sns.countplot(x=column, data=df,hue = 'is_canceled',order=pd.value_counts(df[column]).iloc[:13].index)
plt.tight_layout()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 119390 entries, 0 to 119389 Data columns (total 32 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 hotel 119390 non-null object 1 is_canceled 119390 non-null int64 2 lead_time 119390 non-null int64 3 arrival_date_year 119390 non-null int64 4 arrival_date_month 119390 non-null object 5 arrival_date_week_number 119390 non-null int64 6 arrival_date_day_of_month 119390 non-null int64 7 stays_in_weekend_nights 119390 non-null int64 8 stays_in_week_nights 119390 non-null int64 9 adults 119390 non-null int64 10 children 119386 non-null float64 11 babies 119390 non-null int64 12 meal 119390 non-null object 13 country 118902 non-null object 14 market_segment 119390 non-null object 15 distribution_channel 119390 non-null object 16 is_repeated_guest 119390 non-null int64 17 previous_cancellations 119390 non-null int64 18 previous_bookings_not_canceled 119390 non-null int64 19 reserved_room_type 119390 non-null object 20 assigned_room_type 119390 non-null object 21 booking_changes 119390 non-null int64 22 deposit_type 119390 non-null object 23 agent 103050 non-null float64 24 company 6797 non-null float64 25 days_in_waiting_list 119390 non-null int64 26 customer_type 119390 non-null object 27 adr 119390 non-null float64 28 required_car_parking_spaces 119390 non-null int64 29 total_of_special_requests 119390 non-null int64 30 reservation_status 119390 non-null object 31 reservation_status_date 119390 non-null object dtypes: float64(4), int64(16), object(12) memory usage: 29.1+ MB
df[df.isnull().any(axis=1)]
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | NaN | NaN | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 2015-07-01 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | NaN | NaN | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 2015-07-01 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | NaN | NaN | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 2015-07-02 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | NaN | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 2015-07-02 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | NaN | 0 | Transient | 98.00 | 0 | 1 | Check-Out | 2015-07-03 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 119385 | City Hotel | 0 | 23 | 2017 | August | 35 | 30 | 2 | 5 | 2 | 0.0 | 0 | BB | BEL | Offline TA/TO | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 394.0 | NaN | 0 | Transient | 96.14 | 0 | 0 | Check-Out | 2017-09-06 |
| 119386 | City Hotel | 0 | 102 | 2017 | August | 35 | 31 | 2 | 5 | 3 | 0.0 | 0 | BB | FRA | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 9.0 | NaN | 0 | Transient | 225.43 | 0 | 2 | Check-Out | 2017-09-07 |
| 119387 | City Hotel | 0 | 34 | 2017 | August | 35 | 31 | 2 | 5 | 2 | 0.0 | 0 | BB | DEU | Online TA | TA/TO | 0 | 0 | 0 | D | D | 0 | No Deposit | 9.0 | NaN | 0 | Transient | 157.71 | 0 | 4 | Check-Out | 2017-09-07 |
| 119388 | City Hotel | 0 | 109 | 2017 | August | 35 | 31 | 2 | 5 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 89.0 | NaN | 0 | Transient | 104.40 | 0 | 0 | Check-Out | 2017-09-07 |
| 119389 | City Hotel | 0 | 205 | 2017 | August | 35 | 29 | 2 | 7 | 2 | 0.0 | 0 | HB | DEU | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 9.0 | NaN | 0 | Transient | 151.20 | 0 | 2 | Check-Out | 2017-09-07 |
119173 rows × 32 columns
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 4 babies 0 meal 0 country 488 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 16340 company 112593 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df = df.drop(['agent','company'],axis= 1).dropna()
removed distribution channel becuase its same as market segment and agent and company have so large number of missing values
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 0 babies 0 meal 0 country 0 market_segment 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
n = df.nunique(axis=0)
print("No.of.unique values in each column :\n",
n)
No.of.unique values in each column : hotel 2 is_canceled 2 lead_time 479 arrival_date_year 3 arrival_date_month 12 arrival_date_week_number 53 arrival_date_day_of_month 31 stays_in_weekend_nights 15 stays_in_week_nights 33 adults 14 children 5 babies 5 meal 5 country 177 market_segment 7 is_repeated_guest 2 previous_cancellations 15 previous_bookings_not_canceled 73 reserved_room_type 10 assigned_room_type 12 booking_changes 21 deposit_type 3 days_in_waiting_list 128 customer_type 4 adr 8870 required_car_parking_spaces 5 total_of_special_requests 6 reservation_status 3 reservation_status_date 926 dtype: int64
df.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 2015-07-03 |
sns.countplot(x='country', data=df,order=pd.value_counts(df['country']).iloc[:10].index,palette= 'colorblind')
plt.title('Top 10 Countries of Origin of the Guests', weight='bold')
plt.xlabel('Country')
plt.ylabel('Reservation Count')
Text(0, 0.5, 'Reservation Count')
df['country'].value_counts(normalize = True)[:10]
PRT 0.408636 GBR 0.102012 FRA 0.087596 ESP 0.072062 DEU 0.061288 ITA 0.031674 IRL 0.028386 BEL 0.019698 BRA 0.018705 NLD 0.017696 Name: country, dtype: float64
sns.barplot(x = "country", y = "is_canceled",order=pd.value_counts(df['country']).iloc[:10].index, hue="hotel",hue_order = ["City Hotel", "Resort Hotel"],data=df,palette= 'Blues')
plt.title("cancelation on basis of country", weight = 'bold')
plt.xlabel("country")
plt.ylabel("Cancelations %")
plt.legend(loc="upper right")
plt.show()
cancelation of city hotel are more from every coutry tourist than resort hotel so we can conclude that confirmed booking and check in percentage is more for resort hotel
NLD has lesser percentage of cancelations of resort hotel so chances of NLD booking confirmed for resort hotel is more
and DEU has lesser percentage of cancelations of city hotel so chances of DEU booking confirmed for city hotel is more
sns.barplot(x = "market_segment", y = "is_canceled",order=pd.value_counts(df['market_segment']).iloc[:10].index, hue="hotel",hue_order = ["City Hotel", "Resort Hotel"],data=df,palette= 'Set1')
plt.title("cancelation on basis of marketsegment", weight = 'bold')
plt.xlabel("market_segment")
plt.ylabel("Cancelations %")
plt.legend(loc="upper right")
plt.show()
sns.countplot(x = "arrival_date_month",order=pd.value_counts(df_notcanceled['arrival_date_month']).iloc[:10].index, hue="hotel",hue_order = ["City Hotel", "Resort Hotel"],data=df_notcanceled,palette= 'Set1')
plt.title("cancelation on basis of arrival_date_month", weight = 'bold')
plt.xlabel("arrival_date_month")
plt.ylabel("count")
plt.legend(loc="upper right")
plt.show()
sns.countplot(x = "arrival_date_month",order=pd.value_counts(df_canceled['arrival_date_month']).iloc[:10].index, hue="hotel",hue_order = ["City Hotel", "Resort Hotel"],data=df_canceled,palette= 'Set1')
plt.title("cancelation on basis of arrival_date_month", weight = 'bold')
plt.xlabel("arrival_date_month")
plt.ylabel("count")
plt.legend(loc="upper right")
plt.show()
as we can see groups cancels more than other market segments
no cancelation of resort hotel from aviation
lowest cancelataion of city hotel from complementary marget segment(only case where cancelation of city hotel is lesser than resort hotel)
lesser number of cancelations are from direct
so chances of direct market segment booking confirmed is more
sns.countplot(df['market_segment'],order=pd.value_counts(df['market_segment']).index)
plt.title('Total Number of Bookings by market_segment',weight = 'bold')
plt.xticks(rotation=45)
plt.xlabel('Market Segment')
plt.ylabel('Reservation Count')
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
Text(0, 0.5, 'Reservation Count')
56402
sns.barplot(x = "market_segment", y = "is_canceled",order=pd.value_counts(df['market_segment']).iloc[:10].index, hue="is_repeated_guest",hue_order = [0, 1],data=df,palette= 'Set2')
plt.title("cancelation on basis of marketsegment on basis of repeated guest", weight = 'bold')
plt.xlabel("market_segment")
plt.xticks(rotation=45)
plt.ylabel("Cancelations %")
plt.legend(loc="upper right")
plt.show()
we can conclude that repeated guest do less cancelations excluding the groups
corporate repeated guest from NLD for resort hotel have maximum chances for confirm check in
onlineTA prefer to be a repeated guest more
sns.countplot(df['customer_type'],order=pd.value_counts(df['customer_type']).index)
plt.title('Total Number of Bookings by Customer Type',weight = 'bold')
plt.xlabel('Market Segment')
plt.ylabel('Reservation Count')
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
Text(0, 0.5, 'Reservation Count')
#plt.figure(figsize=(7,4))
sns.barplot(x = "deposit_type", y = "is_canceled",order=pd.value_counts(df['deposit_type']).iloc[:10].index, hue="customer_type",data=df,palette= 'Set3')
plt.title("cancelation on basis of marketsegment on basis of customer_type", weight = 'bold')
plt.xlabel("deposit_type")
plt.ylabel("Cancelations %")
plt.legend(loc="upper left")
plt.show()
refundable and non refundable group cancelations are negligible
but non refund deposit type excluding groups are so high
fig = plt.figure(figsize = (15,20))
ax = fig.gca()
df.hist(ax = ax)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: UserWarning: To output multiple subplots, the figure containing the passed axes is being cleared This is separate from the ipykernel package so we can avoid doing imports until
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f424a5af3d0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424afb5310>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424af61350>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424a9626d0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f424a8c38d0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424aeff990>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f4259e82fd0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424ac77c90>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f424ac77550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424b166d10>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f4259e53a50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424a681e90>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f424ac27550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424a94bb90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424b057290>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424adba990>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f424b03cf50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424a705610>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424b1fac90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f424a76a490>]],
dtype=object)
sns.pairplot(df,hue="is_canceled",height=3)
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:306: UserWarning: Dataset has 0 variance; skipping density estimate. warnings.warn(msg, UserWarning)
<seaborn.axisgrid.PairGrid at 0x7f42419ec3d0>
columns = ['lead_time', 'stays_in_weekend_nights', 'stays_in_week_nights', 'adults', 'children', 'babies', 'required_car_parking_spaces', 'adr', 'previous_cancellations', 'previous_bookings_not_canceled', 'booking_changes']
n = 1
plt.figure(figsize=(20,15))
for column in columns:
plt.subplot(4,4,n)
n = n+1
sns.boxplot(df[column])
plt.tight_layout()
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
boxplot to check outliner
for column in columns:
print(column,"0.35% quantile",df[column].quantile(0.0035))
print(column,"99.65% quantile",df[column].quantile(0.9965))
df[column] = np.where(df[column] <df[column].quantile(0.0035), df[column].quantile(0.0035),df[column])
df[column] = np.where(df[column] >df[column].quantile(0.9965), df[column].quantile(0.9965),df[column])
print(df[column].skew())
lead_time 0.35% quantile 0.0 lead_time 99.65% quantile 510.0 1.2740614632943779 stays_in_weekend_nights 0.35% quantile 0.0 stays_in_weekend_nights 99.65% quantile 4.0 0.7337756720816102 stays_in_week_nights 0.35% quantile 0.0 stays_in_week_nights 99.65% quantile 10.0 1.442579439937437 adults 0.35% quantile 1.0 adults 99.65% quantile 3.0 -0.39041531267706353 children 0.35% quantile 0.0 children 99.65% quantile 2.0 3.9504928514300564 babies 0.35% quantile 0.0 babies 99.65% quantile 1.0 11.273699163521435 required_car_parking_spaces 0.35% quantile 0.0 required_car_parking_spaces 99.65% quantile 1.0 3.6511589224755103 adr 0.35% quantile 0.0 adr 99.65% quantile 289.0 0.9201647682817626 previous_cancellations 0.35% quantile 0.0 previous_cancellations 99.65% quantile 2.0 4.475412768953266 previous_bookings_not_canceled 0.35% quantile 0.0 previous_bookings_not_canceled 99.65% quantile 9.0 9.971737517970483 booking_changes 0.35% quantile 0.0 booking_changes 99.65% quantile 4.0 3.3958641878172173
outliners removed taking 0.35 percentile as lower z score and 99.65 as higher z score
n = 1
plt.figure(figsize=(20,15))
for column in columns:
plt.subplot(4,4,n)
n = n+1
sns.boxplot(df[column])
plt.tight_layout()
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
ordered_months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
df['arrival_date_month'] = pd.Categorical(df['arrival_date_month'], categories=ordered_months, ordered=True)
plt.figure(figsize=(12,6))
sns.lineplot(x='arrival_date_month', y="adr" ,hue='hotel', data= df)
plt.show()
df_not_canceled = df[df['is_canceled'] == 0]
x,y = get_count(df_not_canceled['arrival_date_year'])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-33-4fbef0008efd> in <module>() ----> 1 x,y = get_count(df_not_canceled['arrival_date_year']) NameError: name 'get_count' is not defined
df['arrival_date_year'] = df['arrival_date_year'].apply(str)
plt.figure(figsize=(12,6))
sns.lineplot(x='arrival_date_year', y= ,hue='hotel', data= df)
plt.show()
plt.figure(figsize=(12,6))
sns.lineplot(x = "arrival_date_month", y = "adr", hue="is_canceled",hue_order= [1,0],data=df,palette= 'Set1')
plt.title("Relationship between ADR and Arrival Month by Booking cancellation status", weight = 'bold')
plt.xlabel("Arrival Month")
plt.xticks(rotation=45)
plt.ylabel("Average Daily Rate")
plt.legend(loc="upper right")
plt.show()
we can conclude that adr is on peak in august month ie busiest month
fig, ax = plt.subplots(figsize=(22,15))
sns.heatmap(df.corr(), annot=True, ax=ax);
lead time has most impact on cancelations as lead time increasing chances of cancelation increase
and in case of special requests the more of special request the lesser are chances of cancelation
city hotel is most booked and canceled but rate of cancelation is less in resort hotel that means those who book resort hotel are maximum sure that they dont need to cancel
as a conclusion on analysing everything that most of the guests prefer to visit in summer as we can see (mostly busiest months are may july august)
atleast one parking space required for every guest
maintain the rooms according to family with babies or childrens(mostly visited by 2 guest with no babies or childrens) so to increase room space maybe helpful
repeated guests are more maybe on asking them what make them to visit here again and again can helpful
adr 65 having good results cant consider 0 because its free
mostly guest stayed for upto 2 nights prefering BB that means morning arrival are more
most guest prefer no deposit that means pay on arrival
as we can see from heatmap that repeated guest and previous booking not cancelled is correlated as the number of adult and childrens increase with guest adr increase and most of the special request are of car parking space